####################################################################################
## 拷贝RNA的数据
## 所有样本的
cp -rf ~/20220915_gastric_multiple/rna_combine/analysis/images/TMM/CombineCounts.FilterLowExpression.TMM.tsv \
${mRNA_path}/CombineCounts.FilterLowExpression.TMM.tsv

cp -rf ~/20220915_gastric_multiple/rna_combine/analysis/images/DiffGene/CombineCounts.FilterLowExpression-MergeMutiSample.TMM.tsv \
${mRNA_path}/CombineCounts.FilterLowExpression-MergeMutiSample.TMM.tsv

## TPM
cp -rf ~/20220915_gastric_multiple/rna_combine/analysis/RSEM/CombineTPM.FilterLowExpression.tsv \
${mRNA_path}/CombineTPM.FilterLowExpression.tsv

####################################################################################
## 20230205
## 1、TGCA和NJMU的数据均去除中位TPM<1的低表达基因
## 2、产生TCGA和NJMU去除批次效应以后的表达矩阵CombineCounts.TCGA_NJMU.FilterLowExpression.TMM.tsv
## 3、TCGA单独的表达矩阵TCGA.FilterLowExpression.TMM.tsv
## 4、主成分分析的图，两组样本间没有明显批次效应
## 去除TCGA的Normal样本
${Rscript} ${scripts_path}/mRNA/combineCounts.R \
--sample_list_file ~/20220915_gastric_multiple/rna_combine/analysis/config/tumor_normal.list  \
--rsem_file ~/20220915_gastric_multiple/rna_combine/analysis/RSEM/CombineCounts.FilterLowExpression.tsv \
--sample_list_public_file ${work_dir}/public_ref/combine/MutationInfo.combine.tsv  \
--raw_tcga_count_file ${work_dir}/public_ref/TCGA/TCGA-STAD.RSEM_counts.tsv \
--gtf_file ${ref_path}/GTF/gencode.v19.ensg_genename.txt \
--gtf_for_len_file ${ref_path}/GTF/gencode.v19.annotation.gtf \
--out_path ${mRNA_path}

## 提取用到的MSS样本NJMU和TCGA
## 不含Normal
## 1、CombineTMM.DNAUse.NJMU_TCGA.MergeMutiSample.tsv，合并NJMU的多样本，用于分析表达变化
## 2、CombineTMM.DNAUse.NJMU_TCGA.tsv，提取用到的NJMU样本，用于分析突变情况
${Rscript} ${scripts_path}/mRNA/getDnaSample.R \
--sample_list_file ${config_path}/tumor_normal.class.list \
--sample_list_public_file ${work_dir}/public_ref/combine/MutationInfo.combine.tsv  \
--tmm_file ${mRNA_path}/CombineCounts.FilterLowExpression.TMM.tsv \
--tmm_combine_file ${mRNA_path}/CombineCounts.FilterLowExpression-MergeMutiSample.TMM.tsv \
--tmm_combinepublic_file ${mRNA_path}/CombineCounts.TCGA_NJMU.FilterLowExpression.TMM.tsv \
--out_path ${mRNA_path}

## 提取用到的样本NJMU和TCGA的TPM
## 1、CombineTPM.DNAUse.NJMU_TCGA.tsv，用于scissor
${Rscript} ${scripts_path}/mRNA/getDnaSample.TPM.R \
--sample_list_file ${config_path}/tumor_normal.class.list \
--sample_list_public_file ${work_dir}/public_ref/combine/MutationInfo.combine.tsv  \
--tmm_file ${mRNA_path}/CombineTPM.FilterLowExpression.tsv \
--tmm_combinepublic_file ${mRNA_path}/TCGA.FilterLowExpression.TPM.tsv \
--out_path ${mRNA_path}

####################################################################################
## 20230926
## 提取所有NJMU和TCGA的MSS和MSI的样本,具有表达数据
## 保留Normal
## TMM
${Rscript} ${scripts_path}/mRNA/getDnaSample.MSS_MSI.TCGA_NJMU.R \
--sample_list_file ${config_path}/tumor_normal.class.MSS_MSI.list \
--sample_list_public_file ${work_dir}/public_ref/combine/MutationInfo.combine.tsv  \
--tmm_file ${mRNA_path}/CombineCounts.FilterLowExpression.TMM.tsv \
--tmm_combinepublic_file ${mRNA_path}/CombineCounts.TCGA_NJMU.FilterLowExpression.TMM.tsv \
--out_path ${mRNA_path}/ShareData \
--out_file CombineTMM.DNAUse.NJMU_TCGA.MergeMutiSample.tsv

## TPM
${Rscript} ${scripts_path}/mRNA/getDnaSample.TPM.MSS_MSI.TCGA_NJMU.R \
--sample_list_file ${config_path}/tumor_normal.class.MSS_MSI.list \
--sample_list_public_file ${work_dir}/public_ref/combine/MutationInfo.combine.tsv  \
--tmm_file ${mRNA_path}/CombineTPM.FilterLowExpression.tsv \
--tmm_combinepublic_file ${mRNA_path}/TCGA.FilterLowExpression.TPM.tsv \
--out_path ${mRNA_path}/ShareData \
--out_file CombineTPM.DNAUse.NJMU_TCGA.MergeMutiSample.tsv

## 用到的样本基线
use_sample=`cat ${mRNA_path}/ShareData/CombineTMM.DNAUse.NJMU_TCGA.MergeMutiSample.tsv | head -1 | tr '\t' '\n' | awk -F'_' '{print $1}' | sort -u | \
tr '\n' '|' | sed 's/|$//'`
cat ${work_dir}/public_ref/combine/MutationInfo.combine.addMolecularSubType.Race.tsv  |\
grep -E "Molecular.subtype|${use_sample}" \
> ${mRNA_path}/ShareData/SampleInfo.tsv


####################################################################################
## NJMU的所有TMM
## MSS + MSI
${Rscript} ${scripts_path}/mRNA/getDnaSample.MSS_MSI.R \
--sample_list_file ${config_path}/tumor_normal.class.MSS_MSI.list \
--sample_list_public_file ${work_dir}/public_ref/combine/MutationInfo.combine.tsv  \
--tmm_file ${mRNA_path}/CombineCounts.FilterLowExpression.TMM.tsv \
--tmm_combine_file ${mRNA_path}/CombineCounts.FilterLowExpression-MergeMutiSample.TMM.tsv \
--tmm_combinepublic_file ${mRNA_path}/CombineCounts.TCGA_NJMU.FilterLowExpression.TMM.tsv \
--out_path ${mRNA_path}

## 53个人，157个样本 + 41个Normal
ln -snf ${mRNA_path}/CombineTMM.DNAUse.MSS_MSI.tsv ${mRNA_path}/NJSCC.DNAUse.53_157.tsv

####################################################################################

ln -snf ${mRNA_path}/CombineTMM.DNAUse.MergeMutiSample.tsv ${mRNA_path}/NJMU.FilterLowExpression.MergeMutiSample.TMM.tsv
ln -snf ${mRNA_path}/CombineTMM.DNAUse.tsv ${mRNA_path}/NJMU.FilterLowExpression.TMM.tsv

####################################################################################
## RNA样本变化
## 1、总共测序RNA有，55个人207个样本
#cat ${mRNA_path}/CombineCounts.FilterLowExpression.TMM.tsv | head -1 | tr '\t' '\n' | grep -v "gene_id" > ${mRNA_path}/raw_njmu_sample.list
#与DNA重叠的样本
#dna_sample=`cat ${config_path}/tumor_normal.class.MSS_MSI.list | awk -F'\t' '{print $1}' | sort -u | grep -v ID | tr '\n' '|' | sed 's/|$//'`
#cat ${mRNA_path}/raw_njmu_sample.list | grep -v -E ${dna_sample}

## 2、DNA质控合格的样本取交集，53个人，198个样本（去除2个人6个样本，以及1个人IGC-1样本）
# cat ${mRNA_path}/raw_njmu_sample.list | grep -E ${dna_sample} | grep -v "JZGC00576_IGC-1" | wc -l


<<EOF
## 突变数量不到120个，纯度低于0.1
## JZGC00762|JZGC00750
JZGC00750_IM-1
JZGC00750_IM-2
JZGC00750_IGC-1
JZGC00750_Normal
JZGC00762_IM-1
JZGC00762_IM-2
JZGC00762_DGC-1
JZGC00762_Normal

JZGC00576_IGC-1
EOF

## 3、去除Normal样本，53个人，157个样本
# cat ${mRNA_path}/raw_njmu_sample.list | grep -E ${dna_sample} | grep -v "JZGC00576_IGC-1" | grep -v Normal | wc -l

## 4、去除MSI的样本，48个人，142个样本
# dna_mss_sample=`cat ${config_path}/tumor_normal.class.list | awk -F'\t' '{print $1}' | sort -u | grep -v ID | tr '\n' '|' | sed 's/|$//'`
# cat ${mRNA_path}/raw_njmu_sample.list | grep -E ${dna_mss_sample} | grep -v Normal

cat ${mRNA_path}/CombineTMM.DNAUse.NJMU_TCGA.tsv | head -1 | tr '\t' '\n' | grep -v gene | awk -F'_' '{print $1}' | grep -v TCGA

####################################################################################
## 总的
## 输出样本的类型
use_sample=`cat ${mRNA_path}/CombineTMM.DNAUse.NJMU_TCGA.tsv | head -1 | tr '\t' '\n' | grep -v gene | awk -F'_' '{print $1}' | 
sort -u  | tr '\n' '|' | sed 's/|$//'`

## IGC的样本数量
IM_igc_sample_num=`cat ${work_dir}/public_ref/combine/MutationInfo.combine.tsv | grep -E -w ${use_sample}  | grep "IGC" | grep -v "DGC" |\
awk -F'\t' '{print $1}' | sort -u | wc -l`

## IM + DGC的样本数量
IM_dgc_sample_num=`cat ${work_dir}/public_ref/combine/MutationInfo.combine.tsv | grep -E -w ${use_sample}  | grep "DGC" | grep -v "IGC" |\
awk -F'\t' '{print $1}' | sort -u | wc -l`

## IM + IGC + DGC的样本数量
IM_igc_dgc_sample_num=`cat ${work_dir}/public_ref/combine/MutationInfo.combine.tsv | grep -E -w ${use_sample}  | grep "IGC + DGC" | \
awk -F'\t' '{print $1}' | sort -u | wc -l`

echo "Class,Num"  > ${mRNA_path}/Sample_Record.csv
echo "IM + IGC,${IM_igc_sample_num}"  >> ${mRNA_path}/Sample_Record.csv
echo "IM + DGC,${IM_dgc_sample_num}"  >> ${mRNA_path}/Sample_Record.csv
echo "IM + IGC + DGC,${IM_igc_dgc_sample_num}"  >> ${mRNA_path}/Sample_Record.csv

mv ${mRNA_path}/Sample_Record.csv ${mRNA_path}/Sample_Record.All.csv

####################################################################################
## NJMU
## 输出样本的类型
use_sample=`cat ${mRNA_path}/CombineCounts.TCGA_NJMU.FilterLowExpression.TMM.tsv | head -1 | tr '\t' '\n' | grep -v gene | awk -F'_' '{print $1}' | 
sort -u  | tr '\n' '|' | sed 's/|$//'`

## IM + IGC的样本数量
IM_igc_sample_num=`cat ${config_path}/tumor_normal.class.MSS_MSI.list | grep -E -w ${use_sample}  | grep "IM + IGC" | grep -v "DGC" | \
awk -F'\t' '{print $1}' | sort -u | wc -l`

## IM + DGC的样本数量
IM_dgc_sample_num=`cat ${config_path}/tumor_normal.class.MSS_MSI.list | grep -E -w ${use_sample}  | grep "IM + DGC" | \
awk -F'\t' '{print $1}' | sort -u | wc -l`

## IM + IGC + DGC的样本数量
IM_igc_dgc_sample_num=`cat ${config_path}/tumor_normal.class.MSS_MSI.list | grep -E -w ${use_sample}  | grep "IM + IGC + DGC" | \
awk -F'\t' '{print $1}' | sort -u | wc -l`

echo "Class,Num"  > ${mRNA_path}/Sample_Record.csv
echo "IM + IGC,${IM_igc_sample_num}"  >> ${mRNA_path}/Sample_Record.csv
echo "IM + DGC,${IM_dgc_sample_num}"  >> ${mRNA_path}/Sample_Record.csv
echo "IM + IGC + DGC,${IM_igc_dgc_sample_num}"  >> ${mRNA_path}/Sample_Record.csv

mv ${mRNA_path}/Sample_Record.csv ${mRNA_path}/Sample_Record.NJMU.csv



####################################################################################
## TCGA
## 输出样本的类型
use_sample=`cat ${mRNA_path}/TCGA.FilterLowExpression.TMM.tsv | head -1 | tr '\t' '\n' | grep -v gene | awk -F'_' '{print $1}' | 
sort -u  | tr '\n' '|' | sed 's/|$//'`

## IGC的样本数量
IM_igc_sample_num=`cat ${work_dir}/public_ref/combine/MutationInfo.combine.tsv | grep -E -w ${use_sample}  | grep "IGC" |\
awk -F'\t' '{print $1}' | sort -u | wc -l`

## IM + DGC的样本数量
IM_dgc_sample_num=`cat ${work_dir}/public_ref/combine/MutationInfo.combine.tsv | grep -E -w ${use_sample}  | grep "DGC" | \
awk -F'\t' '{print $1}' | sort -u | wc -l`

## IM + IGC + DGC的样本数量
IM_igc_dgc_sample_num=`cat ${work_dir}/public_ref/combine/MutationInfo.combine.tsv | grep -E -w ${use_sample}  | grep "IGC + DGC" | \
awk -F'\t' '{print $1}' | sort -u | wc -l`

echo "Class,Num"  > ${mRNA_path}/Sample_Record.csv
echo "IM + IGC,${IM_igc_sample_num}"  >> ${mRNA_path}/Sample_Record.csv
echo "IM + DGC,${IM_dgc_sample_num}"  >> ${mRNA_path}/Sample_Record.csv
echo "IM + IGC + DGC,${IM_igc_dgc_sample_num}"  >> ${mRNA_path}/Sample_Record.csv

mv ${mRNA_path}/Sample_Record.csv ${mRNA_path}/Sample_Record.TCGA.csv



####################################################################################
